#encoding:utf8

"""
  parser functions for specified url

  the return content should contains fields below:
    id, url, source
    title,
    content
"""

import re
import pdb
import hashlib
from decorators.dec_todo import TODO
from utils.ValueErrHandler import ValueErrHandler
from utils.tools import get_hash_id

from config import source  # 解析的源数据


# 对于匹配到的正则,调用函数进行解析
parsers = [
    # (regex, parse_function)

]


@TODO(status=0)
def get_content(url, page, bs):
    def get_func(url):
        for parser in parsers:
            regex, func = parser
            if re.match(regex, url):
                return func

    func = get_func(url)
    if not func:
        return None
    results = func(url, page, bs)
    return results

